rm(list = ls())
options(warn=-1)
#------------------------------------------------------------------------------------------------------------
library(logr)
# Open log
lf <- log_open("03_empirical_app_manuscript.log")
setwd('~/Dropbox/Research/compliance_blocking/replication/GOTV')
#------------------------------------------------------------------------------------------------------------
load('Data/generated/gotv_cleaned.Rdata')
source('Code/helper_functions.R')
#------------------------------------------------------------------------------------------------------------
#Load libraries
#------------------------------------------------------------------------------------------------------------
library(AER)
library("quickblock")
library(estimatr)
library(tidyverse)
library(Matching)
ggMelody<-theme_minimal() + theme(plot.title = element_text(hjust = 0.5, size = 19, face = "bold"),
                             axis.text=element_text(size=18), #size = 12
                             axis.text.x = element_text(size=16), #size = 12
                             axis.text.y = element_text(size=16), #size = 12
                             legend.text=element_text(size=14),
                             legend.position='bottom', axis.title = element_text(size = 14),
                             strip.text.x = element_text(size = 16, face='bold'),
                             strip.text.y = element_text(size = 12, face='bold'),
                             plot.subtitle=element_text(size = 14, hjust=0.5))
theme_set(ggMelody)
#------------------------------------------------------------------------------------------------------------
log_print(sessionInfo())
#------------------------------------------------------------------------------------------------------------
covariates = c("age", "turf", "voted99", "voted00", "missing99", "famsize", "primary",
    "missing_primary", "age18", "age35", "age50", "age65", "age2")

df_all = df

#Summaries:
df_all %>% filter(T==1) %>%  summarize(mean(C))
#By city breakdown: (for page 17, "The overall compliance rate in the encouragement group was 29%,
#with compliance rates in individual sites ranging from 14% (Columbus) to 45% (Raleigh)."")
df_all %>% filter(T==1) %>% group_by(city) %>% summarize(mean(C))

cities = unique(df_all$city)
#--------------------------------------------------------------------------------------------------
load('Data/generated/results_matching.Rdata')
results = rbind(
    data.frame(blocking_set = "Full", cities, lapply(block_set1, function(x) return(x$summary)) %>% bind_rows()),
    data.frame(blocking_set = "Restricted", cities, lapply(block_set2, function(x) return(x$summary)) %>% bind_rows())
    )

#TOTAL MATCHED UNITS:
#(for page 17-18: "[We] are left with 17,442 matched units, where matched pairs constitute the blocks in our analyses"):
sum(lapply(block_set1, function(x) return(x$n_units)) %>% unlist())
#--------------------------------------------------------------------------------------------------
#FIGURE 4:
df_points<-results[,-which(names(results) %in% c("at", "iv_se", "psw_se", "iv_block_se",
                                                       "bdim_se", "psw_block_se", "psw_full", "psw_full_se"))]

df_se<-results[,which(names(results) %in% c("blocking_set", "cities", "iv_se", "psw_se", "iv_block_se",
                                                       "bdim_se", "psw_block_se"))]

names(df_se)<-names(df_points)
df_points<-df_points %>% gather(key = Estimator, value = Estimate, -c(blocking_set, cities))
df_se<-df_se %>% gather(key = Estimator, value = SE, -c(blocking_set, cities))
df_plot<-left_join(df_points, df_se)

#Re-code:
df_plot$Estimator[which(df_plot$Estimator=="iv")]<-"IV"
df_plot$Estimator[which(df_plot$Estimator=="psw")]<-"PSW"
df_plot$Estimator[which(df_plot$Estimator=="iv_block")]<-"IV (Block)"
df_plot$Estimator[which(df_plot$Estimator=="bdim")]<-"Block DiM"
df_plot$Estimator[which(df_plot$Estimator=="psw_block")]<-"PSW (Block)"
df_plot$Estimator<-factor(df_plot$Estimator, levels=c("IV", "IV (Block)", "Block DiM", "PSW", "PSW (Block)"))
pl1 = c("darkorange2", "#ffc14d", "#3dbbf5", "#00d0ca", "#88d575")

df_plot %>% filter(blocking_set=='Full') %>%
ggplot(aes(x = Estimator, y = Estimate, color=Estimator, group=Estimator)) +
geom_point(position=position_dodge(width=0.6), size=5) +
geom_linerange(aes(x = Estimator, ymin = Estimate-1.96*SE, ymax = Estimate+1.96*SE), size=1,
    position=position_dodge(0.6))+
facet_wrap(~cities, nrow=2)+ylab("Estimate of CACE")+
scale_colour_manual(values=pl1)+
ggtitle("Comparison of CACE Estimates")+
geom_hline(yintercept=0, linetype='dashed')+xlab("")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position='none')
ggsave("../Figures/fig4_gotv_estimates.pdf", height=6.5, width=9.5)

#---------------------------------------------------------------------------------------------------------
#TABLE 2:
rel_reduc = results %>%
group_by(blocking_set, cities) %>%
summarize(
        iv_block = 100*(iv_se-iv_block_se)/iv_se,
        psw = 100*(iv_se-psw_se)/iv_se,
        psw_block = 100*(iv_se - psw_block_se)/iv_se,
        bdim = 100*(iv_se - bdim_se)/iv_se)

#Pg 18: PSW relative reduction
rel_reduc %>% filter(blocking_set == "Full") %>% summarize(mean(psw))

#Pg 19: IV Block relative reduction
rel_reduc %>% filter(blocking_set == "Full") %>% summarize(mean(iv_block))

log_print(xtable::xtable(rel_reduc[1:6,-1], digits=1), include.rownames=FALSE)

log_close()